*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*	This program cleans the DPS match files and simulates the match
*	----------------------------------------------------------------------------
*	IN: 	AT_Runs_spring_{2012-2019}.csv (raw match files)
*
*	MID: 	capacity_denver_{2012-2019}.txt 	(input to perl DA script)
*		 	choice_denver_{2012-2019}.txt		(input to perl DA script)
*		 	priority_denver_{2012-2019}.txt		(input to perl DA script)
*			match_denver_{2012-2019}.txt		(output from perl DA script)
*
*	OUT: 	match_{2012-2019}.dta (cleaned match files, unique on student-choice-bucket)
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

	forval year = 2012/2019{

***	clean

	//load 
	if `year'==2012 insheet using "${rawdata}match/1_AT_RUN_022262012.csv", clear c
	else insheet using "${rawdata}match/AT Runs_spring_`year'.csv", clear c

**	rename

	//drop empty cols
	cap drop v*

	if `year'==2012{
		//rename
		rename maskedstudentids 		stu
		rename familylink				famlink
		rename choiceselection			choice
		rename requestedschool			sch
		rename guischoolname			schname
		rename currentschool			currentschname

		gen tiebreaker = lotterynumber
		destring tiebreaker, replace force

		//create program ID
		gen prg = strofreal(sch) + "_" + grade + "_" + capacityprogram
		replace prg = subinstr(prg," ","_",.)
	}
	else if `year'<2018{
		//rename
		rename maskedstudentnumber 		stu
		rename maskedfamilylink			famlink
		rename choiceselection			choice
		rename requestedschool			sch
		rename guischoolname			schname
		cap rename currentschool		currentsch
		cap rename currentschoolnumber 	currentsch
		rename currentschoolname		currentschname

		//set tiebreaker
		if `year' >= 2015 gen tiebreaker = lotterynumberatchoicerounded
		else gen tiebreaker = lotterynumber

		//setting missing lotteries to missing (effectively sends to back of line)
		destring tiebreaker, replace force

		//create program ID
		gen prg = strofreal(sch) + "_" + grade + "_" + capacityprogram
		replace prg = subinstr(prg," ","_",.)
	}
	else{
		//switched vendors from 2018 onwards
		rename maskedfamilylink		famlink
		rename schoolname 			schname
		rename schoolnumber 		sch
		rename gradeapplying 		grade
		rename appprioritysequence 	apppriorityseq
		rename currentschoolnumber 	currentsch
		rename currentschoolname	currentschname
		rename choiceselection		choice
		rename waitlistrank 		waitinglistrank

		gen tiebreaker = lotterynumber

		//create program ID
		gen prg = strofreal(sch) + "_" + grade
		replace prg = subinstr(prg," ","_",.)

		//enrollment ID crosswalk
		preserve
			if `year'==2018 import excel using "${rawdata}match/ID_Crosswalk_120819", sheet("2018-19") firstrow clear
			if `year'==2019 import excel using "${rawdata}match/ID_Crosswalk_120819", sheet("2019-20") firstrow clear
			rename MaskedChoiceID maskedstudentid
			rename MaskedStudentID realstu
			tempfile id_xwalk
			save "`id_xwalk'"

			//some IDs were still missing from ID_Crosswalk, this file fills in some of the missings
			import delimited using "${rawdata}match/missingIDs`year'_update.csv", varnames(1) clear
			destring *, replace force
			rename maskedstudentid realstu
			rename maskedchoiceid maskedstudentid
			append using "`id_xwalk'", gen(update)

			duplicates drop
			drop if mi(realstu) | realstu == 111140 //this is a placeholder ID to hold seats for students who will move into the district
			duplicates tag realstu, gen(dups)
			drop if dups & update //keep the original ID pairing when DPS ID is also present in the update file
			drop dups update
			tempfile id_xwalk_full
			save "`id_xwalk_full'"
		restore

		merge m:1 maskedstudentid using "`id_xwalk_full'", gen(id_merge) keep(1 3)

		rename maskedstudentid stu
		tostring stu, replace
	}

**	drop

*	ineligible
	drop if apppriorityseq == 0

	//invalidated choices
	drop if status == "NULL"

	//recoding 2012 choice variable (shouldn't actually matter though)
	if `year'==2012{
		destring choice, replace force
		drop if mi(choice)
		replace choice = choice*10 if choice<10
	}

**	destring student ID

	// generate unique row identifier for stable sorting consistent with sort in raw data
	gen id = _n
	sort stu id
	//create unique negative ID's for students with non-numeric ID's
	destring stu, gen(stunum) force
	//ID's with #VALUE! are all unique students
	replace stunum = -_n if stu == "#VALUE!"
	//some students have ID's that start with PAS
	egen num = group(stu) if stunum == .
	replace stunum = min(0,r(min)) - num if stunum == .

	drop stu
	rename stunum stu

**	duplicates
	//a few kids rank the same school and program more than once and get multiple offers

	//keep best priority
	sort stu prg choice

	by stu prg choice: egen min_prio = min(apppriorityseq)
	drop if apppriorityseq != min_prio
	drop min_prio

	//keep lowest choice
	sort stu prg apppriorityseq, stable
	by stu prg apppriorityseq: egen min_choice= min(choice)
	drop if choice != min_choice
	drop min_choice

	//force any remaining drops (small # of duplicates caused by dstlink/waitinglistrank; keep raw order)
	sort stu prg apppriorityseq choice id
	duplicates drop stu prg apppriorityseq choice, force
	*if r(N_drop)>0 stop

**	define match vars

*	choices: generate a stable sorting with id;
*   a few have same choice at diff programs at same school; keep them in same order as in raw data file
	sort stu choice id
	by stu: replace choice = _n if !inlist(choice,98,99)

*	offers
	gen offer = inlist(status, "A", "FLA", "Accepted")

	//check that no one gets an offer with a missing lotto number
	su offer if tiebreaker == .
	if `r(N)' != 0 assert `r(max)' == 0

*	capacities
	bys prg grade: egen capacity = total(offer)

*	priorities
	gen priority = apppriorityseq
	//kids with priority 888 are guaranteed, give them priority 0
	replace priority = 0 if apppriorityseq == 888

*	family link
	tostring famlink, replace
	replace famlink = "" if inlist(famlink,"#VALUE!","#REF!")
	destring famlink, replace

	//siblings applying to the same grade get the best tiebreaker (but note we don't have access to the famlink variable for all years)
	sort famlink grade
	by famlink grade: egen famtiebreaker = min(tiebreaker)
	replace tiebreaker = famtiebreaker if !mi(famlink)

*	offered school (some get offers to two programs at same school, so keep observations in same order as raw)
	gsort stu -offer id
	by stu: gen dps_offer = prg if _n == 1 & offer == 1
	by stu: replace dps_offer = strofreal(stu) if _n == 1 & offer == 0
	by stu: replace dps_offer = dps_offer[_n - 1]  if _n > 1

***	run the match

*	export for perl

	preserve
		keep prg capacity
		duplicates drop
		outsheet using "${cleandata}da/capacity_denver_`year'.txt", replace delimiter(" ") nonames
	restore

	preserve
		keep prg stu choice
		sort stu choice
		order stu prg, first
		outsheet using "${cleandata}da/choice_denver_`year'.txt", replace delimiter(" ") nonames
	restore

	preserve
		keep prg stu priority tiebreaker id
		sort prg priority tiebreaker id
		drop id
		order prg stu, first
		outsheet using "${cleandata}da/priority_denver_`year'.txt", replace delimiter(" ") nonames
	restore

*	DA perl script

	shell perl "${ado}daaNYC_sims.pl" "${cleandata}da/capacity_denver_`year'.txt" "${cleandata}da/choice_denver_`year'.txt" "${cleandata}da/priority_denver_`year'.txt" "${cleandata}da/match_denver_`year'.txt"

*	merge in results

	preserve
		insheet using "${cleandata}da/match_denver_`year'.txt", clear  names  delimiter(" ")
		ren student_id stu
		tempfile match
		sa `match'
	restore

	merge m:1 stu using `match', nogen

*	code up offer

	replace offer = ourmatch == prg

***	investigate

	sort prg priority tiebreaker id
	by prg: gen rank = _n
/*	matchdebug tiebreaker priority app if grade=="06"|grade=="6", seii(ourmatch) dist(dps_offer) ///
		stu(stu) choice(choice) rank(rank) prg(prg)
	matchdebug tiebreaker priority app if grade=="09"|grade=="9", seii(ourmatch) dist(dps_offer) ///
		stu(stu) choice(choice) rank(rank) prg(prg)
*/
***	save

	drop apppriorityseq id

	//switch to DPS ID
	if `year' >= 2018{
		drop stu
		rename realstu stu
		drop if mi(stu)
	}

	isid stu choice

	save "${cleandata}match_`year'.dta", replace

	}
